
#Describe our data

library(tidyverse)
library(readr)
library(fuzzyjoin)
library(car)
library(data.table)


#just need to point your R to our shared folder


#BREAK OUT PARTISAN ISSUE AREA FOR NOMINATIONS AND JAN 6
our_bills<-fread("Data/racial_opinion_gaps_edited.csv")
our_bills$Bill_topic_partisan <- ifelse(our_bills$`If include ""partisan"" category, items included`==1, "Partisan",
                                        our_bills$Bill_topic)
our_bills$`Jacob's Proposed Issue Areas Partisan` <- ifelse(our_bills$`If include ""partisan"" category, items included`==1, "Partisan",
                                                            our_bills$`Jacob's Proposed Issue Areas`)

our_bills$Topic <- our_bills$`Jacob's Proposed Issue Areas`
our_bills$id <- "CES"
our_bills<-our_bills[,c("Topic", "id")]

all_bills <- fread("Intermediate Data/Congressional Agenda/Final Datasets and code/US-Legislative-congressional_bills_19.3_3_2 (1).csv")
new_codes <-readxl::read_xlsx("Intermediate Data/Congressional Agenda/Issue Coding Template.xlsx")
colnames(new_codes)<-c("majortopic", "Cap Code Name", "Jacob's Proposed Issue Areas")
all_bills <- merge(all_bills, new_codes, by="majortopic", all.x = T)
all_bills$Topic <- all_bills$`Jacob's Proposed Issue Areas`
all_bills$id <- NULL
all_bills$id <- "All Bills"
all_bills<-all_bills[,c("Topic", "id")]

cq_bills <- fread("Intermediate Data/Congressional Agenda/Final datasets and code/CQ (kuriwaki) topic codes.csv")
cq_bills$majortopic<-cq_bills$`major topic`
cq_bills <- merge(cq_bills, new_codes, by="majortopic", all.x = T)
cq_bills$Topic <- cq_bills$`Jacob's Proposed Issue Areas`
cq_bills$id <- "CQ"
cq_bills<-cq_bills[,c("Topic", "id")]

public_laws <- fread("Intermediate Data/Congressional Agenda/Final Datasets and code/US-Legislative-congressional_bills_19.3_3_2 (1).csv")
#new_codes <-readxl::read_xlsx("Intermediate Data/public_laws_topics.xlsx")
colnames(new_codes)<-c("majortopic", "Cap Code Name", "Jacob's Proposed Issue Areas")
public_laws <- merge(public_laws, new_codes, by="majortopic", all.x = T)
public_laws$Topic <- public_laws$`Jacob's Proposed Issue Areas`
public_laws$id <- NULL
public_laws$id <- "Public Laws"
public_laws<-public_laws[,c("Topic", "id")]

#currylee
currylee_dem <- fread("Intermediate Data/Congressional Agenda/Final Datasets and code/Curry-Lee Democratic Party Priorities.csv")
currylee_dem$majortopic <- currylee_dem$Majortopic
currylee_dem <- merge(currylee_dem, new_codes, by="majortopic", all.x = T)
currylee_dem$Topic <- currylee_dem$`Jacob's Proposed Issue Areas`
#currylee_dem$id <- NULL
currylee_dem$id <- "Curry Lee (Dems)"
currylee_dem<-currylee_dem[,c("Topic", "id")]

#currylee
currylee_gop <- fread("Intermediate Data/Congressional Agenda/Final Datasets and code/Curry-Lee Republican Party Priorities.csv")
currylee_gop$majortopic <- currylee_gop$Majortopic
currylee_gop <- merge(currylee_gop, new_codes, by="majortopic", all.x = T)
currylee_gop$Topic <- currylee_gop$`Jacob's Proposed Issue Areas`
#currylee_gop$id <- NULL
currylee_gop$id <- "Curry Lee (Reps)"
currylee_gop<-currylee_gop[,c("Topic", "id")]

#mayhew
mayhew_bills <- fread("Intermediate Data/Congressional Agenda/Final Datasets and code/Mayhew major topic codes.csv")
mayhew_bills$majortopic <- mayhew_bills$`major topic`
mayhew_bills <- merge(mayhew_bills, new_codes, by="majortopic", all.x = T)
mayhew_bills$Topic <- mayhew_bills$`Jacob's Proposed Issue Areas`
#mayhew_bills$id <- NULL
mayhew_bills$id <- "Mayhew Enactments"
mayhew_bills<-mayhew_bills[,c("Topic", "id")]


combined_df <- rbind(our_bills, all_bills, cq_bills, public_laws, currylee_dem, currylee_gop, mayhew_bills)
freqs<-combined_df %>% group_by(id, Topic) %>%
  summarise(n = n()) %>%
  mutate(freq = n / sum(n)) %>% filter(!is.na(Topic) & Topic!="Partisan Politics (Nominations, Impeachments, Jan 6)")

ggplot(freqs, aes(x = Topic,y = freq, fill = id)) +
 # scale_fill_manual(values=c("red", "black", "blue", "green"), name="Data") + 
  geom_col(position=position_dodge()) + 
  #scale_y_continuous(labels = scales::percent) +
  labs(title = "Topic Distribution of All Bills", x = "Topics", y= "Frequency (%)", scale="Data") +
  theme(axis.text.x = element_text(angle = 90), text = element_text(size=15))  

freqs %>% filter(id%in%c("All Bills", "CES")) %>%
ggplot(aes(x = Topic,y = freq, fill = id)) +
  scale_fill_manual(values=c("red", "black"), name="Data") + 
  geom_col(position=position_dodge()) +
  #scale_y_continuous(labels = scales::percent) +
  labs(title = "Topic Distribution of All Bills", x = "Topics", y= "Frequency (%)", scale="Data") +
  theme(axis.text.x = element_text(angle = 90), text = element_text(size=15))  

####Regression plots
#reshape(freqs[c(1,2,4)], idvar = "id", timevar = "Topic", direction = "wide")
wide<-spread(freqs[c(1,2,4)], id, freq)

wide$order <- wide$CES - wide$`All Bills`

all<-wide %>%
  mutate(diff = CES - `All Bills`,
         Topic = fct_reorder(Topic, desc(order))) %>%
  ggplot(aes(x = diff, y=Topic)) + 
  geom_point() + geom_vline(xintercept = 0) +
  xlab("Difference (proportion)") + ylab("Issue Area") + ggtitle("CES vs All Bills") +
  scale_y_discrete(labels=scales::label_wrap(15))+theme_bw()
cq<-wide %>% filter(!is.na(CQ)) %>%
  mutate(diff = CES - `CQ`,
         Topic = fct_reorder(Topic, desc(order))) %>%
  ggplot(aes(x = diff, y=Topic)) + 
  geom_point() + geom_vline(xintercept = 0) +
  xlab("Difference (proportion)") + ylab("Issue Area") +ggtitle("CES vs CQ Major \nIssues") +
  scale_y_discrete(labels=scales::label_wrap(15))+theme_bw()
public<-wide %>%
  mutate(diff = CES - `Public Laws`,
         Topic = fct_reorder(Topic, desc(order))) %>%
  ggplot(aes(x = diff, y=Topic)) + 
  geom_point() + geom_vline(xintercept = 0) +
  xlab("Difference (proportion)") + ylab("Issue Area") +ggtitle("CES vs Public Laws") +
  scale_y_discrete(labels=scales::label_wrap(15)) +theme_bw()

currylee_dem<-wide %>%
  mutate(diff = CES - `Curry Lee (Dems)`,
         Topic = fct_reorder(Topic, desc(order))) %>%
  ggplot(aes(x = diff, y=Topic)) + 
  geom_point() + geom_vline(xintercept = 0) +
  xlab("Difference (proportion)") + ylab("Issue Area") +ggtitle("CES vs Curry & Lee \n(Dems)") +
  scale_y_discrete(labels=scales::label_wrap(15)) +theme_bw()
currylee_gop<-wide %>%
  mutate(diff = CES - `Curry Lee (Reps)`,
         Topic = fct_reorder(Topic, desc(order))) %>%
  ggplot(aes(x = diff, y=Topic)) + 
  geom_point() + geom_vline(xintercept = 0) +
  xlab("Difference (proportion)") + ylab("Issue Area") +ggtitle("CES vs Curry & Lee \n(Reps)") +
  scale_y_discrete(labels=scales::label_wrap(15)) +theme_bw()
mayhew<-wide %>%
  mutate(diff = CES - `Mayhew Enactments`,
         Topic = fct_reorder(Topic, desc(order))) %>%
  ggplot(aes(x = diff, y=Topic)) + 
  geom_point() + geom_vline(xintercept = 0) +
  xlab("Difference (proportion)") + ylab("Issue Area") +ggtitle("CES vs Mayhew \nEnactments") +
  scale_y_discrete(labels=scales::label_wrap(15)) +theme_bw()
ggpubr::ggarrange(all, cq, public, ncol=3, nrow=1)
ggsave("Figures/AgendaMainText.png", width=10, height=5)
ggpubr::ggarrange(currylee_dem, currylee_gop, mayhew, ncol=3, nrow=1)
ggsave("Figures/AgendaSI.png", width=10, height=5)
